We are interested in the impact different DIF-flagging scenarios have on the test level. That means, how would removing the DIF items impact the drawn conclusions, item parameters, ICC, etc.
For this, we compare the: - Baseline model = same item parameters are assumed across all 3 countries. - Corrected model = flagged items use country specific parameters. - Purified model = flagged items are removed.
These models were created in the previous document based on the follwowing workflow:
multiverseDIF()Estimate Corrected Models These models have been ācorrectedā for DIF (using either item parameters of DIF items reestimated in country samples, or US Baseline sample)
Comparison of Baseline and corrected models
# Initialize an empty list to store results
theta_diff_results <- list()
for (i in 1:length(out)) {
# Calculate the difference between theta_corrected and theta_baseline
theta_diff_baseline_corrected <- out[[i]]$theta_baseline - out[[i]]$theta_corrected
# Calculate the sum of Squared differences
theta_diff_baseline_corrected_ss <- sum(theta_diff_baseline_corrected^2)
# Calculate the difference between theta_corrected and theta_baseline
theta_diff_baseline_purified <- out[[i]]$theta_baseline - out[[i]]$theta_purified
# Calculate the sum of Squared differences
theta_diff_baseline_purified_ss <- sum(theta_diff_baseline_purified^2)
# Append the result to the list
theta_diff_results[[i]] <- data.frame(i = i,
theta_diff_baseline_corrected_ss = theta_diff_baseline_corrected_ss,
theta_diff_baseline_purified_ss = theta_diff_baseline_purified_ss
)
}
# Combine the results and bind them to specifications_results
specifications_results2 <- cbind(specifications_results, do.call(rbind, theta_diff_results)) %>% relocate(k_flagged, .after = theta_diff_baseline_purified_ss) %>% dplyr::select(-X)# View the first few rows of the dataframe
theta_top_10_differences_corrected <- specifications_results2 %>%
arrange(desc(theta_diff_baseline_corrected_ss)) %>% #TODO effect size for x axis
head(10) %>%
pull(i)
# View the first few rows of the dataframe
theta_top_10_differences_purified <- specifications_results2 %>%
arrange(desc(theta_diff_baseline_purified_ss)) %>%
head(10) %>%
pull(i) wf_1_comparison hf_1_correction hf_2_parameter_vector hf_3_criterion
1 usa-ger-arg age parameters_promis lr
2 usa-ger-arg age parameters_promis lr_ben
3 usa-ger-arg age parameters_promis lr
4 usa-ger-arg age parameters_promis lr_bon
5 usa-ger-arg age parameters_promis lr_ben
6 usa-ger-arg no parameters_promis lr_bon
7 usa-ger-arg age parameters_multigroup lr
8 usa-ger-arg age parameters_multigroup lr_ben
9 usa-ger-arg age parameters_multigroup lr
10 usa-ger-arg age parameters_multigroup lr_bon
hf_4_threshold
1 0.01
2 0.01
3 0.05
4 0.05
5 0.05
6 0.05
7 0.01
8 0.01
9 0.05
10 0.05
Flagged_items
1 PFM1-PFM2-PFM3-PFM4-PFM6-PFM7-PFM9-PFM10-PFM12-PFM15-PFM16-PFM17-PFM18-PFM19-PFM21-PFM23-PFM26-PFM27-PFM28-PFM29-PFM32-PFM33-PFM34-PFM35-PFM36-PFM37-PFM38-PFM40-PFM43-PFM44-PFM46-PFM49-PFM51-PFM53
2 PFM1-PFM2-PFM3-PFM4-PFM6-PFM7-PFM9-PFM10-PFM12-PFM15-PFM16-PFM17-PFM18-PFM19-PFM21-PFM23-PFM26-PFM27-PFM28-PFM29-PFM32-PFM33-PFM34-PFM35-PFM36-PFM37-PFM38-PFM40-PFM43-PFM44-PFM46-PFM49-PFM51-PFM53
3 PFM1-PFM2-PFM3-PFM4-PFM6-PFM7-PFM9-PFM10-PFM12-PFM15-PFM16-PFM17-PFM18-PFM19-PFM21-PFM23-PFM26-PFM27-PFM28-PFM29-PFM32-PFM33-PFM34-PFM35-PFM36-PFM37-PFM38-PFM40-PFM43-PFM44-PFM46-PFM49-PFM51-PFM53
4 PFM1-PFM2-PFM3-PFM4-PFM6-PFM7-PFM9-PFM10-PFM12-PFM15-PFM16-PFM17-PFM18-PFM19-PFM21-PFM23-PFM26-PFM27-PFM28-PFM29-PFM32-PFM33-PFM34-PFM35-PFM36-PFM37-PFM38-PFM40-PFM43-PFM44-PFM46-PFM49-PFM51-PFM53
5 PFM1-PFM2-PFM3-PFM4-PFM6-PFM7-PFM9-PFM10-PFM12-PFM15-PFM16-PFM17-PFM18-PFM19-PFM21-PFM23-PFM26-PFM27-PFM28-PFM29-PFM32-PFM33-PFM34-PFM35-PFM36-PFM37-PFM38-PFM40-PFM43-PFM44-PFM46-PFM49-PFM51-PFM53
6 PFM1-PFM3-PFM4-PFM6-PFM7-PFM9-PFM10-PFM12-PFM15-PFM16-PFM17-PFM18-PFM19-PFM21-PFM23-PFM25-PFM26-PFM27-PFM28-PFM29-PFM32-PFM33-PFM34-PFM35-PFM36-PFM38-PFM40-PFM43-PFM44-PFM46-PFM49-PFM51-PFM53
7 PFM1-PFM2-PFM3-PFM4-PFM6-PFM7-PFM9-PFM10-PFM12-PFM15-PFM16-PFM17-PFM18-PFM19-PFM21-PFM23-PFM26-PFM27-PFM28-PFM29-PFM32-PFM33-PFM34-PFM35-PFM36-PFM37-PFM38-PFM40-PFM43-PFM44-PFM46-PFM49-PFM51-PFM53
8 PFM1-PFM2-PFM3-PFM4-PFM6-PFM7-PFM9-PFM10-PFM12-PFM15-PFM16-PFM17-PFM18-PFM19-PFM21-PFM23-PFM26-PFM27-PFM28-PFM29-PFM32-PFM33-PFM34-PFM35-PFM36-PFM37-PFM38-PFM40-PFM43-PFM44-PFM46-PFM49-PFM51-PFM53
9 PFM1-PFM2-PFM3-PFM4-PFM6-PFM7-PFM9-PFM10-PFM12-PFM15-PFM16-PFM17-PFM18-PFM19-PFM21-PFM23-PFM26-PFM27-PFM28-PFM29-PFM32-PFM33-PFM34-PFM35-PFM36-PFM37-PFM38-PFM40-PFM43-PFM44-PFM46-PFM49-PFM51-PFM53
10 PFM1-PFM2-PFM3-PFM4-PFM6-PFM7-PFM9-PFM10-PFM12-PFM15-PFM16-PFM17-PFM18-PFM19-PFM21-PFM23-PFM26-PFM27-PFM28-PFM29-PFM32-PFM33-PFM34-PFM35-PFM36-PFM37-PFM38-PFM40-PFM43-PFM44-PFM46-PFM49-PFM51-PFM53
i theta_diff_baseline_corrected_ss theta_diff_baseline_purified_ss
1 9 15.624736173 2040.209
2 41 15.624736173 2040.209
3 153 15.624736173 2040.209
4 169 15.624736173 2040.209
5 185 15.624736173 2040.209
6 173 39.668732447 1415.274
7 1 0.002550601 1247.642
8 33 0.002550601 1247.642
9 145 0.002550601 1247.642
10 161 0.002550601 1247.642
k_flagged
1 34
2 34
3 34
4 34
5 34
6 33
7 34
8 34
9 34
10 34
baseline: baseline model either based on PROMIS or
multigroup parameterscorrected: is a MIRT model, where item parameters are
estimated freely for flagged items (should be IRT_model, is it?)purified: model with flagged items removedspecifications_results %>%
dplyr::mutate(row = dplyr::row_number()) %>%
filter(
wf_1_comparison == "usa-ger-arg" &
hf_1_correction == "age"&
hf_2_parameter_vector == "parameters_multigroup",
str_detect(hf_3_criterion, "Cox")) X wf_1_comparison hf_1_correction hf_2_parameter_vector hf_3_criterion
1 49 usa-ger-arg age parameters_multigroup CoxSnell
2 97 usa-ger-arg age parameters_multigroup CoxSnell
3 209 usa-ger-arg age parameters_multigroup CoxSnell
hf_4_threshold k_flagged Flagged_items row
1 0.02 4 PFM16-PFM33-PFM46-PFM51 49
2 0.03 3 PFM16-PFM33-PFM46 97
3 0.05 1 PFM33 209
specification_finder <- function(removed_items,
wf_1_comparison,
hf_1_correction,
hf_2_parameter_vector,
hf_3_criterion,
hf_4_threshold
) {
indices <- numeric()
# Loop over each element in the 'out' list
for(i in seq_along(out)) {
# Extract flagged_items
flagged_items <- out[[i]]$analysis_results$`last iteration`$Flagged_items
flagged_items_sort <- sort(flagged_items) # Sort to make sure the order is the same
# Check if 'flagged_items' contains only "PFM16", "PFM33", "PFM46"
contains_only_flagged_items <- identical(flagged_items_sort, removed_items)
# Check if "usa-ger-arg" is in 'wf_1_comparison' of 'specification'
contains_wf_1_comparison <- wf_1_comparison %in% out[[i]]$specification$wf_1_comparison
contains_hf_1_correction <- hf_1_correction %in% out[[i]]$specification$hf_1_correction
contains_hf_2_parameter_vector <- hf_2_parameter_vector %in% out[[i]]$specification$hf_2_parameter_vector
contains_hf_3_criterion <- hf_3_criterion %in% out[[i]]$specification$hf_3_criterion
contains_hf_4_threshold <- hf_4_threshold %in% out[[i]]$specification$hf_4_threshold
# If both conditions are met, store the index
if(contains_only_flagged_items && contains_wf_1_comparison &&
contains_hf_1_correction && contains_hf_2_parameter_vector &&
contains_hf_3_criterion && contains_hf_4_threshold) {
indices <- c(indices, i)
}
}
return(indices)
}āPFM15ā āPFM16ā āPFM33ā āPFM46ā āPFM51ā
spec_promis <- specification_finder(removed_items = c( "PFM16", "PFM33", "PFM46", "PFM51"),
wf_1_comparison = "usa-ger-arg",
hf_1_correction = "no",
hf_2_parameter_vector = "parameters_promis",
hf_3_criterion = "CoxSnell",
hf_4_threshold = 0.02)
title_spec_promis <- paste(out[[spec_promis]]$specification %>%
with(paste0("Comparison: ", toupper(wf_1_comparison), "; Parameters: PROMIS")),
"; Flagged Items:", paste0(out[[spec_promis]]$analysis_results$`last iteration`$Flagged_items, collapse = ", "))
title_spec_promis[1] "Comparison: USA-GER-ARG; Parameters: PROMIS ; Flagged Items: PFM16, PFM33, PFM46, PFM51"
specification_finder2 <- function(removed_items
) {
indices <- numeric()
# Loop over each element in the 'out' list
for(i in seq_along(out)) {
# Extract flagged_items
flagged_items <- out[[i]]$analysis_results$`last iteration`$Flagged_items
flagged_items_sort <- sort(flagged_items) # Sort to make sure the order is the same
contains_only_flagged_items <- identical(flagged_items_sort, removed_items)
if (contains_only_flagged_items == TRUE) {
indices <- c(indices, i)
}
}
return(indices)
}
specs_numbers <- specification_finder2(removed_items = c("PFM16", "PFM33", "PFM46", "PFM51")) X wf_1_comparison hf_1_correction hf_2_parameter_vector hf_3_criterion
49 49 usa-ger-arg age parameters_multigroup CoxSnell
52 52 ger-arg age parameters_multigroup CoxSnell
53 53 usa-ger-arg no parameters_multigroup CoxSnell
56 56 ger-arg no parameters_multigroup CoxSnell
57 57 usa-ger-arg age parameters_promis CoxSnell
60 60 ger-arg age parameters_promis CoxSnell
61 61 usa-ger-arg no parameters_promis CoxSnell
100 100 ger-arg age parameters_multigroup CoxSnell
104 104 ger-arg no parameters_multigroup CoxSnell
108 108 ger-arg age parameters_promis CoxSnell
112 112 ger-arg no parameters_promis CoxSnell
hf_4_threshold k_flagged Flagged_items
49 0.02 4 PFM16-PFM33-PFM46-PFM51
52 0.02 4 PFM16-PFM33-PFM46-PFM51
53 0.02 4 PFM16-PFM33-PFM46-PFM51
56 0.02 4 PFM16-PFM33-PFM46-PFM51
57 0.02 4 PFM16-PFM33-PFM46-PFM51
60 0.02 4 PFM16-PFM33-PFM46-PFM51
61 0.02 4 PFM16-PFM33-PFM46-PFM51
100 0.03 4 PFM16-PFM33-PFM46-PFM51
104 0.03 4 PFM16-PFM33-PFM46-PFM51
108 0.03 4 PFM16-PFM33-PFM46-PFM51
112 0.03 4 PFM16-PFM33-PFM46-PFM51
Contains the flagging criteria used for the analysis
.id L.R. Chisq d.f. P deviance0 deviance1 deviance2
1 PFM1 56.03026 4 0.000000000019760970638 10645.302 6990.404 6937.285
2 PFM2 6.90108 4 0.141208956051862344339 9898.760 6619.378 6614.392
3 PFM3 39.14715 4 0.000000064955374634579 10785.813 7380.969 7342.296
4 PFM4 75.94335 4 0.000000000000001221245 11389.593 7808.620 7735.865
5 PFM6 31.87282 4 0.000002031076051567382 11023.740 7385.602 7353.888
6 PFM7 51.97948 4 0.000000000139315003977 10194.373 7192.477 7153.260
7 PFM9 43.73642 4 0.000000007277604852085 10036.315 6241.741 6200.286
8 PFM10 27.84421 4 0.000013413308095011622 11248.578 8977.046 8960.187
9 PFM12 124.58120 4 0.000000000000000000000 10727.383 7298.490 7180.574
10 PFM15 120.68602 4 0.000000000000000000000 11027.159 8910.277 8815.973
11 PFM16 364.19140 4 0.000000000000000000000 8994.933 6642.829 6294.930
12 PFM17 33.90021 4 0.000000781140144234804 11124.870 7590.174 7563.291
13 PFM18 60.14773 4 0.000000000002700728530 9817.997 6627.889 6576.583
14 PFM19 72.59587 4 0.000000000000006439294 9919.832 6760.236 6700.011
15 PFM21 132.11031 4 0.000000000000000000000 11346.321 7759.138 7630.416
16 PFM23 67.43895 4 0.000000000000078825835 8904.625 5743.857 5677.669
17 PFM25 29.87339 4 0.000005193675578940571 9240.539 6455.971 6427.620
18 PFM26 139.20435 4 0.000000000000000000000 10165.910 6777.850 6642.633
19 PFM27 52.58508 4 0.000000000104073416551 11374.372 8024.362 7976.822
20 PFM28 30.01959 4 0.000004849697878506198 9730.989 6623.398 6601.437
21 PFM29 86.80663 4 0.000000000000000000000 10171.450 6912.081 6869.422
22 PFM32 32.93848 4 0.000001229630285703998 11124.005 8135.671 8114.482
23 PFM33 473.24911 4 0.000000000000000000000 11304.037 8803.113 8330.224
24 PFM34 23.67025 4 0.000092997485350965192 9678.046 7280.193 7258.753
25 PFM35 29.36980 4 0.000006575232172179035 8713.630 6469.960 6443.884
26 PFM36 69.93531 4 0.000000000000023425706 11128.733 9510.728 9443.623
27 PFM37 10.99884 4 0.026577010923193089553 7919.682 5321.651 5316.079
28 PFM38 84.75311 4 0.000000000000000000000 11265.529 7593.337 7511.961
29 PFM40 105.10460 4 0.000000000000000000000 9023.564 6588.849 6484.657
30 PFM43 122.57914 4 0.000000000000000000000 10190.856 7736.000 7626.733
31 PFM44 102.45925 4 0.000000000000000000000 11510.613 7886.679 7788.616
32 PFM46 578.81480 4 0.000000000000000000000 11319.949 8358.331 7779.901
33 PFM49 36.04449 4 0.000000283334426143256 11314.915 7438.969 7409.988
34 PFM51 162.79579 4 0.000000000000000000000 11407.917 9184.887 9027.235
35 PFM53 64.59411 4 0.000000000000313304938 11313.846 8555.230 8500.066
deviance3 pseudo1.CoxSnell pseudo2.CoxSnell pseudo3.CoxSnell
1 6934.374 0.6375858 0.6428926 0.6431812
2 6612.477 0.5977520 0.5983086 0.5985222
3 7341.822 0.6115253 0.6156750 0.6157256
4 7732.676 0.6300690 0.6374680 0.6377890
5 7353.730 0.6358951 0.6390877 0.6391036
6 7140.498 0.5655300 0.5702360 0.5717564
7 6198.004 0.6513740 0.6553644 0.6555827
8 8949.202 0.4678372 0.4703229 0.4719362
9 7173.909 0.6141111 0.6265426 0.6272331
10 8789.591 0.4444849 0.4588440 0.4627942
11 6278.637 0.4796122 0.5275355 0.5296683
12 7556.274 0.6252842 0.6280712 0.6287953
13 6567.741 0.5876551 0.5934884 0.5944853
14 6687.640 0.5841464 0.5910434 0.5924460
15 7627.028 0.6307063 0.6436740 0.6440091
16 5676.418 0.5842816 0.5918529 0.5919946
17 6426.097 0.5385014 0.5421206 0.5423141
18 6638.646 0.6097104 0.6240940 0.6245100
19 7971.777 0.6055647 0.6107377 0.6112827
20 6593.378 0.5780971 0.5806623 0.5815996
21 6825.274 0.5955102 0.6002737 0.6051444
22 8102.733 0.5638906 0.5664492 0.5678615
23 8329.864 0.5006800 0.5621286 0.5621723
24 7256.522 0.4861816 0.4892317 0.4895480
25 6440.590 0.4637039 0.4675733 0.4680601
26 9440.793 0.3619386 0.3737189 0.3742109
27 5310.652 0.5139651 0.5147166 0.5154474
28 7508.584 0.6393221 0.6473814 0.6477119
29 6483.744 0.4914145 0.5059190 0.5060443
30 7613.421 0.4942511 0.5093668 0.5111772
31 7784.220 0.6344560 0.6442763 0.6447103
32 7779.516 0.5606432 0.6258407 0.6258807
33 7402.924 0.6591636 0.6618956 0.6625583
34 9022.091 0.4606210 0.4837256 0.4844625
35 8490.636 0.5351634 0.5422300 0.5434273
pseudo1.Nagelkerke pseudo2.Nagelkerke pseudo3.Nagelkerke pseudo1.McFadden
1 0.6725701 0.6428926 0.6431812 0.3433344
2 0.6386231 0.5983086 0.5985222 0.3312922
3 0.6437280 0.6156750 0.6157256 0.3156780
4 0.6579001 0.6374680 0.6377890 0.3144075
5 0.6671348 0.6390877 0.6391036 0.3300275
6 0.6009594 0.5702360 0.5717564 0.2944659
7 0.6941327 0.6553644 0.6555827 0.3780844
8 0.4893656 0.4703229 0.4719362 0.2019395
9 0.6470073 0.6265426 0.6272331 0.3196393
10 0.4662995 0.4588440 0.4627942 0.1919699
11 0.5226001 0.5275355 0.5296683 0.2614922
12 0.6551114 0.6280712 0.6287953 0.3177292
13 0.6288110 0.5934884 0.5944853 0.3249245
14 0.6238383 0.5910434 0.5924460 0.3185131
15 0.6589174 0.6436740 0.6440091 0.3161538
16 0.6381035 0.5918529 0.5919946 0.3549580
17 0.5833205 0.5421206 0.5423141 0.3013426
18 0.6482300 0.6240940 0.6245100 0.3332766
19 0.6324317 0.6107377 0.6112827 0.2945227
20 0.6196449 0.5806623 0.5815996 0.3193500
21 0.6330712 0.6002737 0.6051444 0.3204429
22 0.5907960 0.5664492 0.5678615 0.2686383
23 0.5233517 0.5621286 0.5621723 0.2212417
24 0.5216787 0.4892317 0.4895480 0.2477621
25 0.5089724 0.4675733 0.4680601 0.2574898
26 0.3791844 0.3737189 0.3742109 0.1453898
27 0.5780609 0.5147166 0.5154474 0.3280474
28 0.6685972 0.6473814 0.6477119 0.3259671
29 0.5350804 0.5059190 0.5060443 0.2698174
30 0.5252472 0.5093668 0.5111772 0.2408881
31 0.6615153 0.6442763 0.6447103 0.3148341
32 0.5859131 0.6258407 0.6258807 0.2616283
33 0.6889176 0.6618956 0.6625583 0.3425520
34 0.4808595 0.4837256 0.4844625 0.1948673
35 0.5593277 0.5422300 0.5434273 0.2438266
pseudo2.McFadden pseudo3.McFadden pseudo12.CoxSnell pseudo13.CoxSnell
1 0.3483243 0.3485978 0.0053 0.0056
2 0.3317959 0.3319894 0.0006 0.0008
3 0.3192635 0.3193075 0.0041 0.0042
4 0.3207953 0.3210753 0.0074 0.0077
5 0.3329044 0.3329188 0.0032 0.0032
6 0.2983129 0.2995648 0.0047 0.0062
7 0.3822149 0.3824422 0.0040 0.0042
8 0.2034383 0.2044148 0.0025 0.0041
9 0.3306314 0.3312527 0.0124 0.0131
10 0.2005218 0.2029143 0.0144 0.0183
11 0.3001694 0.3019807 0.0479 0.0501
12 0.3201457 0.3207765 0.0028 0.0035
13 0.3301502 0.3310508 0.0058 0.0068
14 0.3245842 0.3258314 0.0069 0.0083
15 0.3274987 0.3277973 0.0130 0.0133
16 0.3623910 0.3625314 0.0076 0.0077
17 0.3044107 0.3045755 0.0036 0.0038
18 0.3465776 0.3469698 0.0144 0.0148
19 0.2987022 0.2991458 0.0052 0.0057
20 0.3216068 0.3224349 0.0026 0.0035
21 0.3246369 0.3289772 0.0048 0.0096
22 0.2705431 0.2715993 0.0026 0.0040
23 0.2630754 0.2631072 0.0614 0.0615
24 0.2499774 0.2502079 0.0031 0.0034
25 0.2604823 0.2608603 0.0039 0.0044
26 0.1514197 0.1516740 0.0118 0.0123
27 0.3287509 0.3294362 0.0008 0.0015
28 0.3331905 0.3334903 0.0081 0.0084
29 0.2813640 0.2814652 0.0145 0.0146
30 0.2516102 0.2529164 0.0151 0.0169
31 0.3233535 0.3237354 0.0098 0.0103
32 0.3127265 0.3127605 0.0652 0.0652
33 0.3451132 0.3457375 0.0027 0.0034
34 0.2086868 0.2091377 0.0231 0.0238
35 0.2487023 0.2495359 0.0071 0.0083
pseudo23.CoxSnell pseudo12.Nagelkerke pseudo13.Nagelkerke
1 0.0003 -0.0297 -0.0294
2 0.0002 -0.0403 -0.0401
3 0.0001 -0.0281 -0.0280
4 0.0003 -0.0204 -0.0201
5 0.0000 -0.0280 -0.0280
6 0.0015 -0.0307 -0.0292
7 0.0002 -0.0388 -0.0386
8 0.0016 -0.0190 -0.0174
9 0.0007 -0.0205 -0.0198
10 0.0040 -0.0075 -0.0035
11 0.0021 0.0049 0.0071
12 0.0007 -0.0270 -0.0263
13 0.0010 -0.0353 -0.0343
14 0.0014 -0.0328 -0.0314
15 0.0003 -0.0152 -0.0149
16 0.0001 -0.0463 -0.0461
17 0.0002 -0.0412 -0.0410
18 0.0004 -0.0241 -0.0237
19 0.0005 -0.0217 -0.0211
20 0.0009 -0.0390 -0.0380
21 0.0049 -0.0328 -0.0279
22 0.0014 -0.0243 -0.0229
23 0.0000 0.0388 0.0388
24 0.0003 -0.0324 -0.0321
25 0.0005 -0.0414 -0.0409
26 0.0005 -0.0055 -0.0050
27 0.0007 -0.0633 -0.0626
28 0.0003 -0.0212 -0.0209
29 0.0001 -0.0292 -0.0290
30 0.0018 -0.0159 -0.0141
31 0.0004 -0.0172 -0.0168
32 0.0000 0.0399 0.0400
33 0.0007 -0.0270 -0.0264
34 0.0007 0.0029 0.0036
35 0.0012 -0.0171 -0.0159
pseudo23.Nagelkerke pseudo12.McFadden pseudo13.McFadden pseudo23.McFadden
1 0.0003 0.0050 0.0053 0.0003
2 0.0002 0.0005 0.0007 0.0002
3 0.0001 0.0036 0.0036 0.0000
4 0.0003 0.0064 0.0067 0.0003
5 0.0000 0.0029 0.0029 0.0000
6 0.0015 0.0038 0.0051 0.0013
7 0.0002 0.0041 0.0044 0.0002
8 0.0016 0.0015 0.0025 0.0010
9 0.0007 0.0110 0.0116 0.0006
10 0.0040 0.0086 0.0109 0.0024
11 0.0021 0.0387 0.0405 0.0018
12 0.0007 0.0024 0.0030 0.0006
13 0.0010 0.0052 0.0061 0.0009
14 0.0014 0.0061 0.0073 0.0012
15 0.0003 0.0113 0.0116 0.0003
16 0.0001 0.0074 0.0076 0.0001
17 0.0002 0.0031 0.0032 0.0002
18 0.0004 0.0133 0.0137 0.0004
19 0.0005 0.0042 0.0046 0.0004
20 0.0009 0.0023 0.0031 0.0008
21 0.0049 0.0042 0.0085 0.0043
22 0.0014 0.0019 0.0030 0.0011
23 0.0000 0.0418 0.0419 0.0000
24 0.0003 0.0022 0.0024 0.0002
25 0.0005 0.0030 0.0034 0.0004
26 0.0005 0.0060 0.0063 0.0003
27 0.0007 0.0007 0.0014 0.0007
28 0.0003 0.0072 0.0075 0.0003
29 0.0001 0.0115 0.0116 0.0001
30 0.0018 0.0107 0.0120 0.0013
31 0.0004 0.0085 0.0089 0.0004
32 0.0000 0.0511 0.0511 0.0000
33 0.0007 0.0026 0.0032 0.0006
34 0.0007 0.0138 0.0143 0.0005
35 0.0012 0.0049 0.0057 0.0008
beta12 df12 df13 df23 chi12 chi13 chi23
1 0.0231 2 4 2 0.0000 0.0000 0.2333
2 0.0059 2 4 2 0.0827 0.1412 0.3838
3 0.0102 2 4 2 0.0000 0.0000 0.7890
4 0.0106 2 4 2 0.0000 0.0000 0.2030
5 0.0037 2 4 2 0.0000 0.0000 0.9237
6 0.0022 2 4 2 0.0000 0.0000 0.0017
7 0.0086 2 4 2 0.0000 0.0000 0.3196
8 0.0168 2 4 2 0.0002 0.0000 0.0041
9 0.0567 2 4 2 0.0000 0.0000 0.0357
10 0.0426 2 4 2 0.0000 0.0000 0.0000
11 0.1315 2 4 2 0.0000 0.0000 0.0003
12 0.0184 2 4 2 0.0000 0.0000 0.0299
13 0.0143 2 4 2 0.0000 0.0000 0.0120
14 0.0131 2 4 2 0.0000 0.0000 0.0021
15 0.0044 2 4 2 0.0000 0.0000 0.1838
16 0.0149 2 4 2 0.0000 0.0000 0.5350
17 0.0165 2 4 2 0.0000 0.0000 0.4671
18 0.0305 2 4 2 0.0000 0.0000 0.1362
19 0.0027 2 4 2 0.0000 0.0000 0.0802
20 0.0223 2 4 2 0.0000 0.0000 0.0178
21 0.0032 2 4 2 0.0000 0.0000 0.0000
22 0.0101 2 4 2 0.0000 0.0000 0.0028
23 0.1394 2 4 2 0.0000 0.0000 0.8355
24 0.0178 2 4 2 0.0000 0.0001 0.3278
25 0.0228 2 4 2 0.0000 0.0000 0.1926
26 0.0116 2 4 2 0.0000 0.0000 0.2429
27 0.0047 2 4 2 0.0617 0.0266 0.0663
28 0.0381 2 4 2 0.0000 0.0000 0.1848
29 0.0714 2 4 2 0.0000 0.0000 0.6334
30 0.0056 2 4 2 0.0000 0.0000 0.0013
31 0.0391 2 4 2 0.0000 0.0000 0.1110
32 0.1876 2 4 2 0.0000 0.0000 0.8248
33 0.0148 2 4 2 0.0000 0.0000 0.0292
34 0.0335 2 4 2 0.0000 0.0000 0.0764
35 0.0036 2 4 2 0.0000 0.0000 0.0090
Takes the specification and creates bland altman plots to show the difference between thetas for various comparisons:
plot_bland_altman <- function(specification_number, comparison) {
title <- paste0("Flagged DIF Items: ", paste0(out[[specification_number]]$analysis_results$`last iteration`$Flagged_items, collapse = ", "))
# theta for constrained = baseline
t_baseline <- out[[specification_number]]$theta_baseline * 10 + 50
# theta for corrected model
t_corrected <- out[[specification_number]]$theta_corrected * 10 + 50
# theta for purified model
t_purified <- out[[specification_number]]$theta_purified * 10 + 50
# calculate the mean and standard deviation of the differences
if (comparison == "baseline-purified") { # compares baseline model (equal item parameters across countries) with purified model = without DIF items
title1 = "Bland-Altman Plot: Baseline vs Purified Model"
# create a data frame with the two variables to compare
df <- data.frame(method1 = as.numeric(t_baseline),
method2 = as.numeric(t_purified)) %>%
mutate(
diff = method1 - method2,
mean_methods = (method1 + method2) / 2,
Country = dplyr::recode(out[[specification_number]]$analysis_results$`last iteration`$DIF_model$data$country,
"arg" = "Argentina",
"ger" = "Germany",
"usa" = "USA"))
# create a Bland-Altman plot using ggplot2
} else if (comparison == "baseline-corrected") { # compares baseline model (equal item parameters across countries) with corrected model = freely estimated parameters for flagged items
title1 = "Bland-Altman Plot: Baseline vs Corrected Model"
# create a data frame with the two variables to compare
df <- data.frame(method1 = as.numeric(t_baseline),
method2 = as.numeric(t_corrected)) %>%
mutate(
diff = method1 - method2,
mean_methods = (method1 + method2) / 2,
Country = dplyr::recode(out[[specification_number]]$analysis_results$`last iteration`$DIF_model$data$country,
"arg" = "Argentina",
"ger" = "Germany",
"usa" = "USA"))
} else if (comparison == "corrected-purified") { # compares purified model = without DIF items with corrected model = freely estimated parameters for flagged items
title1 = "Bland-Altman Plot: Corrected vs Purified Model"
# create a data frame with the two variables to compare
df <- data.frame(method1 = as.numeric(t_corrected),
method2 = as.numeric(t_purified)) %>%
mutate(
diff = method1 - method2,
mean_methods = (method1 + method2) / 2,
Country = dplyr::recode(out[[specification_number]]$analysis_results$`last iteration`$DIF_model$data$country,
"arg" = "Argentina",
"ger" = "Germany",
"usa" = "USA"))
}
mean_diff <- mean(df$diff)
sd_diff <- sd(df$diff)
# Define colors for each group
group_colors <- c("USA" = "black", "Germany" = "orange", "Argentina" = "purple")
bland_altman <- ggplot(df, aes(x = mean_methods,
y = diff,
color = Country)) +
geom_point(shape = 1) +
scale_color_manual(values = group_colors) +
geom_hline(yintercept = mean_diff,
linetype = "dashed",
color = "black", size = 1.4) +
geom_label(aes(label = paste0("Mean: ", sprintf("%.2f", mean_diff)),
x = 30,
y = mean_diff + mean_diff *1), # add 25% of diff for spacing
hjust = 1, vjust = 0, color = "black") +
geom_hline(yintercept = mean_diff + 1.96 * sd_diff,
linetype = "dashed",
color = "blue", size = 1.4 ) +
geom_label(aes(label = paste0("+1.96SD: ", sprintf("%.2f", (mean_diff + 1.96 * sd_diff))),
x = 30,
y = mean_diff + 1.96 * sd_diff + (mean_diff + 1.96 * sd_diff)*.1), # add 10% of diff for spacing
hjust = 1, vjust = 0, color = "blue") +
geom_hline(yintercept = mean_diff - 1.96 * sd_diff,
linetype = "dashed",
color = "blue", size = 1.4) +
geom_label(aes(label = paste0("-1.96SD: ", sprintf("%.2f", (mean_diff - 1.96 * sd_diff))),
x = 30,
y = mean_diff - 1.96 * sd_diff + (mean_diff - 1.96 * sd_diff)*.1),
hjust = 1, vjust = 0, color = "blue") +
labs(x = "Average of both T-Scores", y = "Difference between T-Scores",
title = title1,
subtitle = title,
size = 1) +
theme_bw() +
#scale_colour_brewer(palette = "Dark2") +
#scale_fill_brewer(palette = "Dark2") +
scale_x_continuous(breaks = c(20, 30, 40, 50, 60, 70, 80),
limits = c(20, 80)) +
scale_y_continuous(breaks = c(-2, -1, 0, 1, 2))
bland_altman
}Very minor differences between thetas, when the baseline model (constrained item parameters for all groups) vs corrected model (flagged item paremeters estimated freely).
bland_altman_baseline_corrected <- plot_bland_altman(spec_promis, comparison = "baseline-corrected")
bland_altman_baseline_correctedWhen comparing the Baseline model with the purified model, we observe large differences, especially for Argentina. For thetas at -2, we find large positive deviations and for thetas at+2 large negative deviations Theta
bland_altman_baseline_purified <- plot_bland_altman(spec_promis, comparison = "baseline-purified")
bland_altman_baseline_purifiedWhen comparing the Baseline model with the purified model, we observe large differences, especially for Argentina. For thetas at -2, we find large positive deviations and for thetas at+2 large negative deviations Theta
bland_altman_corrected_purified <- plot_bland_altman(spec_promis, comparison = "corrected-purified")
bland_altman_corrected_purifiedcowplot::plot_grid(bland_altman_baseline_corrected,
bland_altman_baseline_purified,
bland_altman_corrected_purified,
ncol = 1, align = "v") # Define a range of theta values
thetas <- seq(-4, 4, by = 0.1)
t_scores <- thetas * 10+50
group <- factor(data_complete$country,
levels = c("usa", "ger", "arg")) # usa should always be reference
#get t scores and model for baseline
t_baseline <- out[[spec_promis]]$theta_baseline * 10 + 50
baseline_model <- out[[spec_promis]]$results_baseline$model
#get t scores and model for corrected model
t_corrected <- out[[spec_promis]]$theta_corrected * 10 + 50
corrected_model <- out[[spec_promis]]$analysis_results$`last iteration`$corrected_model$model
# Prepare the data
data_list <- lapply(c("usa", "ger", "arg"), function(g) {
data.frame(
theta = thetas,
test_info_baseline = testinfo(baseline_model, thetas, group = g),
test_info_corrected = testinfo(corrected_model, thetas, group = g),
group = g
)
})
data <- dplyr::bind_rows(data_list)
# Define colors for each group
colors <- c(usa = "black", ger = "orange", arg = "purple", Baseline = "blue")
# Plot using ggplot2
tif_plot_overlapped <- data %>%
mutate(t = theta * 10+50,
group = case_match(group,
"usa" ~ "USA",
"ger" ~ "Germany",
"arg" ~ "Argentina")) %>%
ggplot() +
geom_line(aes(x = t, y = test_info_corrected, color = group), linetype = "dashed", size = .5, alpha = .6) +
geom_line(aes(x = t, y = test_info_baseline, color = group), size = .5, alpha =.6, color = "black") +
scale_color_manual(values = colors) +
labs(x = 'T-scores',
y = 'Test Information',
title = 'Test Information Curves for Each Group',
color = 'Group') +
theme_classic() +
scale_colour_brewer(palette = "Dark2") +
scale_fill_brewer(palette = "Dark2") +
theme(
panel.border = element_rect(color = "black", fill = NA, linewidth = 1), # black border, no fill
panel.background = element_blank(), # remove panel background
axis.line = element_line(color = "black"), # add axis lines
legend.position = c(0.95, 0.95), # position the legend inside the plot area
legend.justification = c("right", "top"), # anchor the legend at its top-right corner
legend.background = element_blank(), # remove legend background
legend.box.background = element_blank(), # remove legend box background
legend.box.margin = margin(0, 0, 0, 0) # remove margin around the legend box
)Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
# Define colors for each group
group_colors <- c("USA" = "black", "Germany" = "orange", "Argentina" = "purple", "Baseline" = "blue")
data_long <- data %>%
pivot_longer(
cols = c(test_info_baseline, test_info_corrected),
names_to = "model_type",
values_to = "test_info"
) %>%
mutate(
t = theta * 10 + 50,
group = case_match(group,
"usa" ~ "USA",
"ger" ~ "Germany",
"arg" ~ "Argentina"),
model_type = case_match(model_type,
"test_info_baseline" ~ "Baseline",
"test_info_corrected" ~ "Corrected"),
unique_id = if_else(model_type == "Baseline", "Baseline", paste(group, model_type, sep = " "))
) # Sample colors from the Viridis palette for the unique IDs excluding 'Baseline'
unique_ids <- unique(data_long$unique_id)
num_unique_ids <- length(unique_ids) - 1 # excluding 'Baseline'
selected_colors <- group_colors
# Set the names of your selected colors to match the unique IDs (excluding 'Baseline')
viridis_colors <- setNames(group_colors, unique_ids[unique_ids != "Baseline"])
# Create a custom color vector, adding 'black' for 'Baseline'
line_colors <- c("Baseline" = "blue", viridis_colors)
# Plot using ggplot2
tif_plot <- ggplot(data_long, aes(x = t, y = test_info, color = unique_id)) +
geom_line(aes(linetype = model_type), size = .5, alpha = 0.7) +
scale_color_manual(values = line_colors) +
scale_linetype_manual(values = c("Baseline" = "solid", "Corrected" = "dashed")) +
labs(
x = 'T-scores',
y = 'Test Information',
title = 'Test Information Function Curves for Each Country/Model Combination',
color = "Country/Model"
) +
guides(
linetype = FALSE # Remove linetype legend title
) +
theme_classic() +
theme(
panel.border = element_rect(color = "black", fill = NA, linewidth = 1), # black border, no fill
panel.background = element_blank(), # remove panel background
axis.line = element_line(color = "black"), # add axis lines
legend.position = c(.999, .999), # position the legend inside the plot area
legend.justification = c("right", "top"), # anchor the legend at its top-right corner
legend.background = element_rect(color = "black", fill = "white", size = 0.5), # add a box around the legend with black border
legend.box.background = element_blank(), # remove legend box background
legend.box.margin = margin(0, 0, 0, 0) # remove margin around the legend box
)
tif_plot# Create data with theta estimates
data_thetas_ets <- data_complete %>% mutate(
t_baseline = fscores(baseline_model)*10+50,
t_corrected = fscores(corrected_model)*10+50,
ets_baseline = mirt::expected.test(baseline_model,
Theta = fscores(baseline_model),
group = 1),
ets_corrected = mirt::expected.test(corrected_model,
Theta = fscores(corrected_model),
group = 1)
) %>%
rowwise() %>%
dplyr::mutate(sum_score = sum(c_across(PFM1:PFM53))) %>%
ungroup() %>%
pivot_longer(cols = c(ets_baseline, ets_corrected),
names_to = "ets_type",
values_to = "ets") %>%
pivot_longer(cols = c(t_baseline, t_corrected),
names_to = "t_type",
values_to = "t") %>%
relocate(sum_score,
t_type,
t,
ets_type,
ets
) %>%
filter(ets_type =="ets_baseline" & t_type == "t_baseline" |
ets_type =="ets_corrected" & t_type == "t_corrected" ) %>%
mutate(group = ifelse(ets_type =="ets_baseline", "baseline", "corrected"),
`Model Type` = ifelse(t_type =="t_baseline", "baseline", "corrected"),
country = case_match(country,
"usa" ~ "USA",
"ger" ~ "Germany",
"arg" ~ "Argentina")
)
tcc_plot <- data_thetas_ets %>%
ggplot(aes(x = t,
y = ets,
color = `Model Type`)) +
geom_line(aes(linetype = `Model Type`),
size = 1,
alpha = .5) + # Thicker lines
scale_linetype_manual(values = c("solid", "solid")) + # Different line types
theme_minimal() +
labs(
x = 'T-scores',
y = 'Expected Test Score',
title = 'Test Characteristic Curves for Each Country/Model Combination'
) +
scale_colour_brewer(palette = "Dark2") +
facet_wrap(~country) +
theme_classic() +
theme(
panel.border = element_rect(color = "black", fill = NA, linewidth = 1), # black border, no fill
panel.background = element_blank(), # remove panel background
axis.line = element_line(color = "black"), # add axis lines
# legend.position = c(.999, .999), # position the legend inside the plot area
legend.justification = c("right", "top"), # anchor the legend at its top-right corner
legend.background = element_rect(color = "black", fill = "white", size = 0.5), # add a box around the legend with black border
legend.box.background = element_blank(), # remove legend box background
legend.box.margin = margin(0, 0, 0, 0) # remove margin around the legend box
)
tcc_plotpf_items <- c("PFM1", "PFM2", "PFM3", "PFM4", "PFM6", "PFM7", "PFM9", "PFM10",
"PFM12", "PFM15", "PFM16", "PFM17", "PFM18", "PFM19", "PFM21",
"PFM23", "PFM25", "PFM26", "PFM27", "PFM28", "PFM29", "PFM32",
"PFM33", "PFM34", "PFM35", "PFM36", "PFM37", "PFM38", "PFM40",
"PFM43", "PFM44", "PFM46", "PFM49", "PFM51", "PFM53")
data_es <- data_complete %>%
mutate(t_corrected = as.numeric(out[[spec_promis]]$theta_corrected * 10 + 50),
theta_corrected =as.numeric(out[[spec_promis]]$theta_corrected),
t_purified = as.numeric(out[[spec_promis]]$theta_purified * 10 + 50),
t_promis = as.numeric(t_promis),
country = case_match(country,
"arg" ~ "Argentina",
"usa" ~ "USA",
"ger" ~ "Germany")) %>%
dplyr::select(
country,
t_promis,
t_purified,
t_corrected,
theta_corrected,
country_vector,
age,
pf_items)
data_es_usa_ger <- data_es %>% filter(country %in% c("USA", "Germany"))
d_usa_ger <- effectsize::cohens_d(t_corrected ~ country, data = data_es_usa_ger)
t.test(t_corrected ~ country, data = data_es_usa_ger)
Welch Two Sample t-test
data: t_corrected by country
t = 1.4597, df = 2432.3, p-value = 0.1445
alternative hypothesis: true difference in means between group Germany and group USA is not equal to 0
95 percent confidence interval:
-0.2317246 1.5813838
sample estimates:
mean in group Germany mean in group USA
51.13355 50.45872
data_es_usa_arg <- data_es %>% filter(country %in% c("USA", "Argentina"))
d_usa_arg <- effectsize::cohens_d(t_corrected ~ country, data = data_es_usa_arg)
t.test(t_corrected ~ country, data = data_es_usa_arg)
Welch Two Sample t-test
data: t_corrected by country
t = 6.9836, df = 2592.1, p-value = 0.000000000003643
alternative hypothesis: true difference in means between group Argentina and group USA is not equal to 0
95 percent confidence interval:
2.028106 3.611663
sample estimates:
mean in group Argentina mean in group USA
53.27860 50.45872
data_es_ger_arg <- data_es %>% filter(country %in% c("Germany", "Argentina"))
d_ger_arg <- effectsize::cohens_d(t_corrected ~ country, data = data_es_ger_arg)
t.test(t_corrected ~ country, data = data_es_ger_arg)
Welch Two Sample t-test
data: t_corrected by country
t = 5.2341, df = 1831.7, p-value = 0.0000001848
alternative hypothesis: true difference in means between group Argentina and group Germany is not equal to 0
95 percent confidence interval:
1.341285 2.948824
sample estimates:
mean in group Argentina mean in group Germany
53.27860 51.13355
data_es %>%
group_by(country) %>%
summarize(mean_corrected = mean(t_corrected),
mean_baseline = mean(t_promis),
mean_theta = mean(theta_corrected))# A tibble: 3 Ć 4
country mean_corrected mean_baseline mean_theta
<chr> <dbl> <dbl> <dbl>
1 Argentina 53.3 50.4 0.328
2 Germany 51.1 49.2 0.113
3 USA 50.5 48.1 0.0459
# A tibble: 3 Ć 2
country mean_theta
<chr> <dbl>
1 Argentina 0.328
2 Germany 0.113
3 USA 0.0459
Cohen's d | 95% CI
-------------------------
0.06 | [-0.02, 0.14]
- Estimated using pooled SD.
Cohen's d | 95% CI
------------------------
0.25 | [0.17, 0.33]
- Estimated using pooled SD.
Cohen's d | 95% CI
------------------------
0.23 | [0.15, 0.32]
- Estimated using pooled SD.
# Save the current palette
oldPalette <- palette()
# Define your color-friendly palette
newPalette <- c("black", "orange", "purple")
# Set the new palette
palette(newPalette)data_viz <- data_complete %>%
mutate(t_corrected = as.numeric(out[[spec_promis]]$theta_corrected * 10 + 50),
t_purified = as.numeric(out[[spec_promis]]$theta_purified * 10 + 50),
t_promis = as.numeric(t_promis),
country = case_match(country,
"arg" ~ "Argentina",
"usa" ~ "USA",
"ger" ~ "Germany")) %>%
dplyr::select(
country,
t_promis,
t_purified,
t_corrected,
country_vector,
age,
pf_items)
data_viz_long <- data_viz %>%
pivot_longer(cols = t_promis:t_corrected,
names_to = "version") %>%
mutate(version = case_match(
version,
"t_corrected" ~ "Corrected",
"t_purified" ~ "Purified",
"t_promis" ~ "Baseline"
))
data_range <- range(data_viz$t_promis, na.rm = TRUE)
data_viz_long %>%
ggplot(aes(x = value, color = interaction(country, version, sep = "-"), linetype = interaction(country, version, sep = "-"))) +
geom_density(alpha = 0.6, adjust = 1) + # Adjust parameter controls smoothness
scale_color_manual(values = c(
"USA-Baseline" = "black",
"Argentina-Baseline" = "purple",
"Germany-Baseline" = "orange",
"USA-Corrected" = "grey",
"Argentina-Corrected" = "purple",
"Germany-Corrected" = "orange",
"USA-Purified" = "darkgrey",
"Argentina-Purified" = "purple",
"Germany-Purified" = "orange"
)) +
scale_linetype_manual(values = c(
"USA-Baseline" = "solid",
"Argentina-Baseline" = "solid",
"Germany-Baseline" = "solid",
"USA-Corrected" = "dashed",
"Argentina-Corrected" = "dashed",
"Germany-Corrected" = "dashed",
"USA-Purified" = "dotted",
"Argentina-Purified" = "dotted",
"Germany-Corrected" = "dotted"
)) +
scale_x_continuous(breaks = c(20, 30, 40, 50, 60, 70, 80),
limits = c(data_range[1] - 10, data_range[2] + 10)) +
labs(
x = 'T-scores',
y = 'Density',
title = 'Distribution of PROMIS Physical Function T-Scores',
color = "Country and Model",
linetype = "Country and Model"
) +
theme_classic() +
theme(
panel.border = element_rect(color = "black", fill = NA, linewidth = 1),
panel.background = element_blank(),
axis.line = element_line(color = "black"),
legend.position = c(.999, .999),
legend.justification = c("right", "top"),
legend.background = element_rect(color = "black", fill = "white", size = 0.5),
legend.box.background = element_blank(),
legend.box.margin = margin(0, 0, 0, 0)
) + facet_wrap(~country)t_score_distribution_baseline_corrected <- data_viz_long %>%
ggplot(aes(x = value, color = interaction(country, version, sep = "-"), linetype = interaction(country, version, sep = "-"))) +
geom_density(alpha = 0.6, adjust = 1) + # Adjust parameter controls smoothness
scale_color_manual(values = c(
"USA-Baseline" = "black",
"Argentina-Baseline" = "purple",
"Germany-Baseline" = "orange",
"USA-Corrected" = "black",
"Argentina-Corrected" = "purple",
"Germany-Corrected" = "orange"
)) +
scale_linetype_manual(values = c(
"USA-Baseline" = "solid",
"Argentina-Baseline" = "solid",
"Germany-Baseline" = "solid",
"USA-Corrected" = "dashed",
"Argentina-Corrected" = "dashed",
"Germany-Corrected" = "dashed"
)) +
scale_x_continuous(breaks = c(20, 30, 40, 50, 60, 70, 80), limits = c(data_range[1] - 10, data_range[2] + 10)) +
labs(
x = 'T-scores',
y = 'Density',
title = 'Distribution of PROMIS Physical Function T-Scores',
color = "Country and Model",
linetype = "Country and Model"
) +
theme_classic() +
theme(
panel.border = element_rect(color = "black", fill = NA, linewidth = 1),
panel.background = element_blank(),
axis.line = element_line(color = "black"),
legend.position = c(.999, .999),
legend.justification = c("right", "top"),
legend.background = element_rect(color = "black", fill = "white", size = 0.5),
legend.box.background = element_blank(),
legend.box.margin = margin(0, 0, 0, 0)
)
t_score_distribution_baseline_corrected# Calculate median for each country
medians <- data_viz %>%
group_by(country) %>%
dplyr::summarize(median_t_promis = median(t_promis))
# Initialize an empty data frame to store density at medians
density_at_medians <- data.frame(country = character(),
median_t_promis = numeric(),
density = numeric())
# Calculate the density at these median points for each country
for (country_name in unique(data_viz$country)) {
data_country <- data_viz[data_viz$country == country_name, ]
density_obj <- density(data_country$t_promis)
median_val <- medians$median_t_promis[medians$country == country_name]
median_density <- approx(density_obj$x, density_obj$y, xout = median_val)$y
density_at_medians <- rbind(density_at_medians, data.frame(country = country_name, median_t_promis = median_val, density = median_density))
}
# Find the range of your data
data_range <- range(data_viz$t_promis, na.rm = TRUE)
# Plotting
distribution_plot_baseline <- data_viz %>%
ggplot(aes(x = t_promis, group = country)) +
geom_density(aes(color = country, linetype = country), alpha = 0.6, adjust = 1) + # adjust parameter controls smoothness
geom_segment(data = density_at_medians, aes(x = median_t_promis, xend = median_t_promis, y = 0, yend = density, color = country), linetype = "dashed") +
scale_color_manual(values = c("USA" = "black", "Argentina" = "purple", "Germany" = "orange")) +
scale_linetype_manual(values = c("USA" = "solid", "Argentina" = "dotted", "Germany" = "dashed")) +
viridis::scale_fill_viridis(discrete = TRUE) +
scale_x_continuous(breaks = c(20, 30, 40, 50, 60, 70, 80), limits = c(data_range[1] - 10, data_range[2] + 10)) + # Extend the x-axis limits
labs(
x = 'T-scores',
y = 'Density',
title = 'Distribution of PROMIS Physical Function T-Scores',
color = "Country",
linetype = "Country"
) +
guides(
#linetype = guide_legend(title = NULL) # Uncomment this to remove linetype legend title
) +
theme_classic() +
theme(
panel.border = element_rect(color = "black", fill = NA, linewidth = 1),
panel.background = element_blank(),
axis.line = element_line(color = "black"),
legend.position = c(.999, .999),
legend.justification = c("right", "top"),
legend.background = element_rect(color = "black", fill = "white", size = 0.5),
legend.box.background = element_blank(),
legend.box.margin = margin(0, 0, 0, 0)
)
### Corrected
# Calculate median for each country
medians <- data_viz %>%
group_by(country) %>%
dplyr::summarize(median_t_corrected = median(t_corrected))
# Initialize an empty data frame to store density at medians
density_at_medians <- data.frame(country = character(),
median_t_corrected = numeric(),
density = numeric())
# Calculate the density at these median points for each country
for (country_name in unique(data_viz$country)) {
data_country <- data_viz[data_viz$country == country_name, ]
density_obj <- density(data_country$t_corrected)
median_val <- medians$median_t_corrected[medians$country == country_name]
median_density <- approx(density_obj$x, density_obj$y, xout = median_val)$y
density_at_medians <- rbind(density_at_medians, data.frame(country = country_name, median_t_corrected = median_val, density = median_density))
}
# Find the range of your data
data_range <- range(data_viz$t_corrected, na.rm = TRUE)
# Plotting
distribution_plot_corrected <- data_viz %>%
ggplot(aes(x = t_corrected, group = country)) +
geom_density(aes(color = country, linetype = country), alpha = 0.6, adjust = 1) + # adjust parameter controls smoothness
geom_segment(data = density_at_medians, aes(x = median_t_corrected, xend = median_t_corrected, y = 0, yend = density, color = country), linetype = "dashed") +
scale_color_manual(values = c("USA" = "black", "Argentina" = "purple", "Germany" = "orange")) +
scale_linetype_manual(values = c("USA" = "solid", "Argentina" = "dotted", "Germany" = "dashed")) +
viridis::scale_fill_viridis(discrete = TRUE) +
scale_x_continuous(breaks = c(20, 30, 40, 50, 60, 70, 80), limits = c(data_range[1] - 10, data_range[2] + 10)) + # Extend the x-axis limits
labs(
x = 'T-scores',
y = 'Density',
title = 'Distribution of PROMIS Physical Function T-Scores',
color = "Country",
linetype = "Country"
) +
guides(
#linetype = guide_legend(title = NULL) # Uncomment this to remove linetype legend title
) +
theme_classic() +
theme(
panel.border = element_rect(color = "black", fill = NA, linewidth = 1),
panel.background = element_blank(),
axis.line = element_line(color = "black"),
legend.position = c(.999, .999),
legend.justification = c("right", "top"),
legend.background = element_rect(color = "black", fill = "white", size = 0.5),
legend.box.background = element_blank(),
legend.box.margin = margin(0, 0, 0, 0)
)
plot_grid(distribution_plot_baseline, distribution_plot_corrected, ncol = 1, align = 'v')